## R Script Output -------------------------------------------------------------
# Appendix Figure A2: Top Five Origin-Countries of International Students in the United States between 2000 and 2016.


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("countrycode",
         "tidyverse",
         "RColorBrewer", 
         "gridExtra", 
         "viridis")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/ISQ-frei-replication/"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "foia-zcta.RData", sep = ""))


## Figure A2 -------------------------------------------------------------------
# summarize to country-zipcode-student level-year
foia.c.z.l.y <- foia.zcta %>%
  rename(Secondary = secondary,
         Undergraduate = undergraduate,
         Graduate = graduate) %>%
  group_by(cty, year) %>%
  mutate(Total = Secondary + Undergraduate + Graduate) %>%
  gather(student_level, n, -zcta, -year, -cty, -iso3n) %>%
  ungroup()

# aggregate up to country-year-level
foia.c.y.l <- foia.c.z.l.y %>%
  group_by(iso3n, year, student_level) %>%
  summarize(n = sum(n) / 1000) %>%
  ungroup() %>%
  arrange(iso3n, student_level, year) %>%
  mutate(student_level = factor(student_level,
                                levels = c("Secondary", "Undergraduate",
                                           "Graduate", "Total"))) 

# extract top countries in 2016
top.2016 <- foia.c.y.l %>%
  filter(student_level == "Total" & year == 2016) %>%
  arrange(desc(n)) %>%
  top_n(5, n) %>%
  select(iso3n) %>%
  unlist(use.names = FALSE)
top.2016

# subset top countries for plotting
foia.p.df <- foia.c.y.l %>%
  filter(iso3n %in% top.2016) %>%
  mutate(cty = countrycode(iso3n, "iso3n", "country.name", warn = TRUE),
         cty = ifelse(cty == "Republic of Korea", "South Korea", cty))

# set label dataframe
label.df <- tibble(cty = c("Canada",
                           "China",
                           "India",
                           "South Korea",
                           "Saudi Arabia"),
                   year = c(2015,
                            2013.5,
                            2015,
                            2003.5, 
                            2009.6),
                   n = c(25,
                         455, 
                         240,
                         120,
                         70),
                   student_level = factor(rep("Total", 5),
                                          levels = c("Secondary", "Undergraduate",
                                                     "Graduate", "Total")))
label.df

# set parameters
axis.title.size <- 14
display.brewer.pal(9, "Set1")
brewer.pal(9, "Set1")
colors <- c(
  "#377EB8", # Canada
  "#E41A1C", # China
  "#FF7F00", # India
  "#4DAF4A", # Saudi arabia
  "black" # South korea
)
shapes <- c(16, 17, 15, 4, 7)

# plot
f.stu <- ggplot(foia.p.df,
                aes(x = year, 
                    y = n,
                    group = cty,
                    color = cty)) +
  geom_point(aes(shape = cty),
             size = 1.5) +
  geom_line() +
  facet_wrap(~ student_level, nrow = 1) +
  scale_color_manual(values = colors) +
  scale_shape_manual(values = shapes) +
  geom_text(data = label.df,
            label = label.df$cty,
            size = axis.title.size - 11.8) +
  scale_y_continuous("Total Students (thousands)") +
  scale_x_continuous("Year", 
                     limits = c(1998.5, 2016.5)) +
  ggtitle("International Students in the U.S., 2000-2016") +
  theme_bw() +
  theme(plot.title = element_text(size = axis.title.size,
                                  face = "bold",
                                  margin = margin(0, 0, 20, 0),
                                  hjust = 0.5),
        axis.title.y = element_text(size = axis.title.size,
                                    margin = margin(0, 20, 0, 0)),
        axis.title.x = element_text(size = axis.title.size,
                                    margin = margin(20, 0, 0, 0)),
        axis.text = element_text(size = axis.title.size - 2),
        strip.text = element_text(size = axis.title.size - 1),
        strip.background = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "none")

# print
pdf(file = paste(MAIN_DIR, "Figure-A2.pdf", sep = "/"),
    width = 8, height = 4)
print(f.stu)
dev.off()

